# Functions for systematic component of DGP 

DGP1 <- function(N) {
  
  Y_err <- 2
  corr <- 1
  
  # Draw the true X's
  Z1_mean <- 1
  Z2_mean <- 2
  Z1 <- rpois(N, Z1_mean)
  Z2 <- Z1*corr + rpois(N, Z2_mean)
  Z <- cbind(Z1, Z2)
  
  b0 <- 2
  b1 <- -1
  b2 <- 6
  
  return(list(Z = Z, b0 = b0, b1 = b1, b2 = b2, Y_err = Y_err))
  
}

DGP2 <- function(N) {
  
  Y_err <- 3
  corr <- 3
  
  # Draw the true X's
  Z1_mean <- 70
  Z2_mean <- 45
  Z1 <- rpois(N, Z1_mean)
  Z2 <- Z1*corr + rpois(N, Z2_mean)
  Z <- cbind(Z1, Z2)
  
  b0 <- 2
  b1 <- -1
  b2 <- 3
  
  return(list(Z = Z, b0 = b0, b1 = b1, b2 = b2, Y_err = Y_err))
  
}

DGP3 <- function(N) {
    
    Y_err <- 2
    corr <- 2
    
    # Draw the true X's
    Z1_mean <- 7
    Z2_mean <- 9
    Z1 <- rpois(N, Z1_mean)
    Z2 <- Z1*corr + rpois(N, Z2_mean)
    Z <- cbind(Z1, Z2)
    
    b0 <- 10
    b1 <- 12
    b2 <- -3
    
    return(list(Z = Z, b0 = b0, b1 = b1, b2 = b2, Y_err = Y_err))
    
}


DGP_random <- function(fixed_data, X_err_vec,  N) {

  b0 <- fixed_data$b0
  b1 <- fixed_data$b1
  b2 <- fixed_data$b2
  Y_err <- fixed_data$Y_err
  Z1 <- fixed_data$Z[, 1]
  Z2 <- fixed_data$Z[, 2]
  
  Y_res <- rnorm(N, mean = 0, sd = Y_err)
  Y <- b0 + b1*Z1 + b2*Z2 +  Y_res
  
  V1 <- rnorm(N, 0, X_err_vec[1])
  V2 <- rnorm(N, 0, X_err_vec[2])
  X1 <-  fixed_data$Z[, 1] + V1
  X2 <- fixed_data$Z[, 2] + V2
  X <- cbind(X1, X2)
  
  return(list(Y = Y, X = X))
  
}

# Function to summarise simuulation output

summarizeSims <- function(sims, fixed_data, N, X_err_vec) {

  b_df <- do.call(rbind, lapply(1:length(sims), FUN = function(i) sims[[i]]$b))
  b <- apply(b_df, 2, mean)
  names(b) <- c('b0_est', 'b1_est', 'b2_est')
  b_se_naive_df <- do.call(rbind, lapply(1:length(sims), FUN = function(i) sqrt(diag(sims[[i]]$b_vcov))))
  b_se_naive <- apply(b_se_naive_df, 2, mean)
  names(b_se_naive) <- c('b0_se_naive', 'b1_se_naive', 'b2_se_naive')
  
  #b_se_est 
  b_se_est_df <- do.call(rbind, lapply(1:length(sims), FUN = function(i) sqrt(diag(sims[[i]]$b_vcov))))
  b_se_est <- apply(b_se_est_df, 2, mean)
  names(b_se_est) <- c('b0_se_est', 'b1_se_est', 'b2_se_est')
  
  #b_se_true
  b_se_true <- apply(b_df, 2, sd)
  names(b_se_true) <- c('b0_se_true', 'b1_se_true', 'b2_se_true')
  
  #
  beta_tilde_df <- do.call(rbind, lapply(1:length(sims), FUN = function(i) sims[[i]]$beta_tilde))
  beta_tilde <- apply(beta_tilde_df, 2, mean)
  names(beta_tilde) <- c('b0_est_tilde', 'b1_est_tilde', 'b2_est_tilde')
  beta_tilde_se_df <- do.call(rbind, lapply(1:length(sims), FUN = function(i) sqrt(diag(sims[[i]]$beta_tilde_vcov))))
  beta_tilde_se_est <- apply(beta_tilde_se_df, 2, mean)
  names(beta_tilde_se_est) <- c('b0_se_est_tilde', 'b1_se_est_tilde', 'b2_se_est_tilde')
  beta_tilde_se_true <- apply(beta_tilde_df, 2, sd)
  names(beta_tilde_se_true) <- c('b0_se_true_tilde', 'b1_se_true_tilde', 'b2_se_true_tilde')
  
  #params
  S1 <- X_err_vec[1]
  S2 <- X_err_vec[2]
  N <- N
  b0 <- fixed_data$b0
  b1 <- fixed_data$b1
  b2 <- fixed_data$b2
  sum_sims <- c(S1, S2, N, b0, b1, b2, 
                b, b_se_naive, b_se_est, b_se_true, beta_tilde, beta_tilde_se_est, beta_tilde_se_true)
  names(sum_sims)[c(1:6)] <- c('S1', 'S2', 'N', 'b0_true', 'b1_true', 'b2_true')
  
  return(sum_sims)
}

# Function to run multiple simulations 

simulationFn <- function(DGP = 1, N, X_err_vec, bootstrap_var, nsims = 1000) {
  
  # Generate data  according to chosen DGP 
    if(DGP == 1){
  
      fixed_data <- DGP1(N = N)
      
    } else {
      
      if(DGP == 2){
        
        fixed_data <- DGP2(N = N)
        
      }else{
          
          if(DGP == 3){
              
               fixed_data <- DGP3(N = N)              
          }
      }
    }
    
    # Run simulations 
    sims <- lapply(1:nsims, FUN = function(i) {
      
      if(i %in% seq(1, nsims, 50)){print(i)}
      
      random_data <- DGP_random(fixed_data = fixed_data, X_err_vec = X_err_vec, N = N)
      
      Y_err <- fixed_data$Y_err

      lmdp_random_data <- as.data.frame(rbind(c(Y_err, c(X_err_vec[1], X_err_vec[2])), cbind(Y = random_data$Y, random_data$X)))
      
      return(lmdp(Y ~ X1 + X2, data = lmdp_random_data))
    })
    
    summary_sims <- summarizeSims(sims, fixed_data = fixed_data, N = N, X_err_vec = X_err_vec)
    
  
  return(list(summary_sims = summary_sims))
}
